## ## ## ## ## ## ## ## ## ## ## ##
## ## ## ## ## ## ## ## ## ## ## ##
## ## ## ## ## ## ## ## ## ## ## ##
          ## This is the replication data for paper "GRASSROOTS ECONOMIC INTERDEPEDENCE: EVIDENCE FROM U.S.-CHINA HIGHER EDUCATION PARTNERSHIPS" ##
          ## Table and Figure ##
## ## ## ## ## ## ## ## ## ## ## ##
## ## ## ## ## ## ## ## ## ## ## ##
## ## ## ## ## ## ## ## ## ## ## ##

rm(list=ls())

rstudioapi::getActiveDocumentContext
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))



        #### libraries####


library(rstudioapi)
library(data.table)
library(dplyr)
library(ggplot2)
library(lfe)
library(stargazer)
library(texreg)
library(brglm)
library(brglm2)
library(sandwich)
library(lmtest)
library(MASS)


## load data
load("rep_tables_and_figures.RData")

# coopnew: US universities with Semi-official Joint Degree Program (Semi-official JDP)
    # UNITID: identification of US university
    # JDP: Semi-official JDP with China
# dt0: University Ranking Data matched with JDP universities (world level, 2004-2009 THE-QS Data, after 2009, QS ranking data.)
    # kongstatus: whether this university host a Confucius Institute
    # jointstatus: whether this university host a Semi-official JDP
    # qs:qs500
    # pscore_avg: political score data - p4v
    # Country
# expand：panel dataset 
    # skongstatus: whether this university host a Confucius Institute
    # generalJDP: whether this university host a Semi-official JDP
    # STABBR：US state
    # year
    # UGDS1: undergraduates
    # UGDS_ASIAN: Asian students
    # UNITID: University
# expand0：expand + CollegeScorecard data
    # skongstatus: whether this university host a Confucius Institute
    # generalJDP: whether this university host a Semi-official JDP
    # STABBR：US state
    # year
    # UGDS1: undergraduates
    # UGDS_ASIAN: Asian students
    # UNITID: University
    # STEM: #PCIP49 + PCIP48 + PCIP47 + PCIP46 + PCIP41 + PCIP40 + PCIP30 + PCIP27 + PCIP15 + PCIP14 + PCIP11 + PCIP10 + PCIP04 + PCIP03 + PCIP01
    # Foreign Language: PCIP16
# newrankdata1US: 
    # kongstatus: whether this university host a Confucius Institute
    # jointstatus0：whether this university host a Fully-approved JDP
    # jointstatus：whether this university host a Semi-official JDP
    # STABBR：US state
    # year
    # UGDS1: undergraduates
    # UGDS_ASIAN: Asian students
    # UNITID: University
    # earlest: the year of the earliest Fully-approved JDP
    # uni_type: Public, Private Non-Profit
    # UGDS: number of the enrollment
    # setRank: US news ranking
    # POPPCT_URBAN: urban rate
# use: US dataset for CI and Fully-approved JDP
    # kongstatus: whether this university host a Confucius Institute
    # jointstatus0：whether this university host a Fully-approved JDP
    # jointstatus：whether this university host a Semi-official JDP
    # 中方大学: CI's host university in China
    # 省份: CI's host province in China
# use_real: international dataset for CI and Fully-approved JDP 
    # kongstatus: whether this university host a Confucius Institute
    # jointstatus0：whether this university host a Fully-approved JDP
    # jointstatus：whether this university host a Semi-official JDP
    # 中方大学: CI's host university in China
    # 省份: CI's host province in China
    # college_in_china：Fully-approved JDP's host university in China
    # province: Fully-approved JDP's host province in China
#==============================================================================#
#==============================                        ========================#
#==============================   Main Tables/Figures  ========================#
#==============================                        ========================#
#==============================================================================#


        #### Table 1 ####


# CI yes/no
newrankdata1US$CI <- ifelse(newrankdata1US$kongstatus == 1, "CI yes", "CI no")
fmt_col <- function(x){
  n_yes <- sum(x == "CI yes", na.rm=TRUE)
  n_no  <- sum(x == "CI no",  na.rm=TRUE)
  n_tot <- n_yes + n_no
  c(
    sprintf("%d (%.1f%%)", n_yes, 100*n_yes/n_tot),
    sprintf("%d (%.1f%%)", n_no,  100*n_no/n_tot),
    sprintf("%d", n_tot)
  )
}

fully <- fmt_col(newrankdata1US$CI[newrankdata1US$jointstatus0 == 1])  # Fully-approved
semi  <- fmt_col(newrankdata1US$CI[newrankdata1US$jointstatus  == 1])  # Semi-official
none  <- fmt_col(newrankdata1US$CI[newrankdata1US$jointstatus  == 0])  # No JDP

Table1 <- data.frame(
  `Fully-approved JDPs` = fully,
  `Semi-official JDPs`  = semi,
  `No JDP`              = none,
  row.names = c("CI yes","CI no","Total"),
  check.names = FALSE
)

## Table1
#  Fully-approved JDPs Semi-official JDPs        No JDP
#  CI yes          25 (18.5%)         102 (8.2%)      0 (0.0%)
#  CI no          110 (81.5%)       1143 (91.8%) 3755 (100.0%)
#  Total                  135               1245          3755


        #### Figure 1 ####


# 1) Keep one row per UNITID (and keep the year variables you used)
tu <- unique(newrankdata1US[, c("UNITID", "earlest", "start_year")])
setDT(tu)

# 2) Merge JDP year from coopnew
tu <- merge(tu, coopnew[, c("UNITID", "JDP")], by = "UNITID", all.x = TRUE)

# 3) Stack three "event-year" sources into one long table
events <- rbindlist(list(
  data.table(year = tu$earlest,    Cooperation = "fully-approved JDPs"),
  data.table(year = tu$start_year, Cooperation = "Confucius Institutes"),
  data.table(year = tu$JDP,        Cooperation = "semi-approved JDPs")
), use.names = TRUE, fill = TRUE)

# 4) Clean: drop missing years; convert to numeric
events <- events[!is.na(year)]
events[, year := as.numeric(year)]
events <- events[!is.na(year)]

# 5) Count number of universities by year and cooperation type
a <- events[, .(tutu_num = .N), by = .(year, Cooperation)]

# 6) Fill missing year-type combinations with 0 (same as your full_grid + left_join)
all_years <- seq(min(a$year), max(a$year))
all_types <- unique(a$Cooperation)
a_full <- CJ(year = all_years, Cooperation = all_types)[a, on = .(year, Cooperation)]
a_full[is.na(tutu_num), tutu_num := 0]

# 7) Plot (same mapping/labels/colors as your working plot)
ggplot(a_full, aes(x = year, y = tutu_num, fill = Cooperation)) +
  geom_col(
    position = position_dodge2(width = 0.8, preserve = "single"),
    width = 0.8
  ) +
  scale_x_continuous(breaks = seq(min(a_full$year), max(a_full$year), by = 2)) +
  labs(x = "Year", y = "Number") +
  scale_fill_manual(values = c(
    "Confucius Institutes" = "#1b9e77",
    "fully-approved JDPs"  = "#d95f02",
    "semi-approved JDPs"   = "#7570b3"
  )) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "top",
    axis.title = element_text(face = "bold")
  )+
  theme(
    panel.grid.major = element_blank(),  # 去掉主网格
    panel.grid.minor = element_blank(),  # 去掉次网格
    panel.background = element_blank(),  # 去掉背景
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "top"
  )


        #### Table 2 ####


fit <- felm(skongstatus ~  generalJDP |STABBR + year |0|STABBR, data=expand) #

fit0 <- felm(skongstatus ~  generalJDP  + UGDS1 + UGDS_ASIAN |STABBR + year |0|STABBR, data=expand)

fit1 <- felm(skongstatus ~  generalJDP |UNITID + year |0|UNITID, data=expand)

fit2 <- felm(skongstatus ~  generalJDP  + UGDS1 + UGDS_ASIAN |UNITID + year |0|UNITID, data=expand)

add_lines_safe <- list(
  as.character(c("Control Variables",       "No",  "Yes", "No",  "Yes")),
  as.character(c("State Fixed Effects",     "Yes", "Yes", "No",  "No")),
  as.character(c("University Fixed Effect", "No",  "No",  "Yes", "Yes")),
  as.character(c("Year Fixed Effect",       "Yes", "Yes", "Yes", "Yes"))
)

stargazer(
  fit, fit0, fit1, fit2,
  type = "latex",
  title = "Table 2: The Effect of Semi-official JDPs on the Establishment of Confucius Institutes in America ",
  covariate.labels = c("Semi-official JDP", "Undergraduate", "UG_asian"),
  dep.var.caption  = "Dependent Variables",
  dep.var.labels   = "Establishment of Confucius Institutes",
  omit.stat = c("f", "ser", "rsq"),   # optional but recommended for logit
  no.space = TRUE,
  notes = "∗p<0.1; ∗∗p<0.05; ∗∗∗p<0.01. The control variables include annual student size and Asian student proportions. Robust standard errors are clustered at the state (column 1 and 2) or university (column 3 and 4) levels. ",
  add.lines = list(c("Control Variables", "No", "Yes", "No","Yes"),
                             c("State Fixed Effects", "Yes","Yes", "No","No"),
                             c("University Fixed Effect",  "No","No", "Yes","Yes"),
                            c("Year  Fixed Effect", "Yes","Yes", "Yes","Yes"))
)

## Table 2: The Effect of Semi-official JDPs on the Establishment of Confucius Institutes in America
# Dependent Variables
# Establishment of Confucius Institutes
#                          (1)        (2)       (3)       (4)
# Semi-official JDP      0.085∗∗∗    0.061∗∗∗  0.037∗∗∗  0.036∗∗∗
#                        (0.007)	    (0.007)   (0.002) 	(0.007)
# Control Variable         No          Yes       No        Yes
# State Fixed Effects      Yes         Yes       No        No
# University Fixed Effect  No          No        Yes       Yes
# Year Fixed Effect        Yes         Yes       Yes       Yes
# Observations            60,000      54,496    60,000    54,496
# Adjusted R2             0.077       0.125     0.665     0.667


        #### Table 3 ####


# --- Public (uni_type == 1)
m1 <- felm(skongstatus ~ generalJDP | UNITID + year | 0 | UNITID,
           data = expand[uni_type == 1])
m2 <- felm(skongstatus ~ generalJDP + UGDS1 + UGDS_ASIAN | UNITID + year | 0 | UNITID,
           data = expand[uni_type == 1])

# --- Private Non-Profit (uni_type == 2)
m3 <- felm(skongstatus ~ generalJDP | UNITID + year | 0 | UNITID,
           data = expand[uni_type == 2])
m4 <- felm(skongstatus ~ generalJDP + UGDS1 + UGDS_ASIAN | UNITID + year | 0 | UNITID,
           data = expand[uni_type == 2])
#
# Compute median of UGDS (ignoring missing values)
ugds_med <- median(expand$UGDS, na.rm = TRUE)

# --- Large (UGDS >= ugds_med)
m5 <- felm(skongstatus ~ generalJDP | UNITID + year | 0 | UNITID,
           data = expand[UGDS >= ugds_med])
m6 <- felm(skongstatus ~ generalJDP + UGDS1 + UGDS_ASIAN | UNITID + year | 0 | UNITID,
           data = expand[UGDS >= ugds_med])

# --- Small (UGDS < ugds_med)
m7 <- felm(skongstatus ~ generalJDP | UNITID + year | 0 | UNITID,
           data = expand[UGDS < ugds_med])
m8 <- felm(skongstatus ~ generalJDP + UGDS1 + UGDS_ASIAN | UNITID + year | 0 | UNITID,
           data = expand[UGDS < ugds_med])

add_lines <- list(
  c("Control Variables",         "No","Yes","No","Yes","No","Yes","No","Yes"),
  c("University Fixed Effect",   "Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"),
  c("Year Fixed Effect",         "Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes")
)

stargazer(
  m1, m2, m3, m4, m5, m6, m7, m8,
  type = "latex",
  title = "Table 3: Effect of Semi-official JDP on Establishment of CI by American University Types",
  dep.var.caption = "Dependent Variables",
  dep.var.labels  = "Establishment of Confucius Institutes",
  column.labels   = c("Public", "Private Non-Profit", "Large", "Small"),
  column.separate = c(2, 2, 2, 2),
  covariate.labels = c("Semi-official JDP", "Undergraduate", "UG_asian"),
  omit.stat = c("f", "ser", "rsq"),
  no.space = TRUE,
  column.sep.width = "-10pt",
  add.lines = add_lines,
  notes = paste0(
    "*p<0.1; **p<0.05; ***p<0.01. ",
    "The control variables include annual student size (UGDS1) and Asian student proportions (UGDS\\_ASIAN). ",
    "Standard errors are clustered at the university (UNITID) level. ",
    "Large and Small are split by UGDS threshold (UGDS >= 1.060 vs. UGDS < 1.060). ",
    "Private for-profit universities are omitted in Panel A if they have no Confucius Institutes."
  )
)

## Table 3: Effect of Semi-official JDP on Establishment of CI by American University Types
#                                                         Dependent Variables
#                                               Establishment of Confucius Institutes
#                                 Public          Private Non-Profit          Large               Small
#                            (1)        (2)        (3)         (4)        (5)       (6)       (7)       (8)
# Semi-official JDP       0.059∗∗∗    0.057∗∗∗    0.016**    0.016**    0.036***   0.036***   0.012      0.012
#                          (0.012)     (0.012)     (0.007)    (0.007)    (0.008)    (0.008)   (0.008)   (0.008)
# Control Variable           No           Yes         No         Yes        No        Yes        No        Yes  
# University Fixed Effect    Yes          Yes         Yes        Yes        Yes       Yes        Yes       Yes
# Year Fixed Effect          Yes          Yes         Yes        Yes        Yes       Yes        Yes       Yes
# Observations              21,672      21,287      17,928      17,235     27,259   27,259      27,237    27,237
# Adjusted R2               0.669       0.674       0.650       0.649       0.665   0.666       0.717     0.717


        #### Table 4 ####


fit0 <- felm(kongstatus ~  jointstatus  |  0 |0|0, data=dt0)
fit1 <- felm(kongstatus ~  jointstatus + qs |  Country |0|Country, data=dt0)
fit2 <- felm(kongstatus ~  jointstatus + qs + pscore_avg  |  0 |0|0, data=dt0)
fit3 <- felm(kongstatus ~  jointstatus|  0 |0|0, data=dt0[ Country != "UnitedStates"])
fit4 <- felm(kongstatus ~  jointstatus + qs| Country |0|Country, data=dt0[ Country != "UnitedStates"])
fit5 <- felm(kongstatus ~  jointstatus + qs + pscore_avg |  0 |0|0, data=dt0[ Country != "UnitedStates"])


stargazer(fit0, fit1, fit2, fit3,fit4, fit5, type ="latex",
          label="State", omit.stat = c("f", "ser"),  no.space=TRUE,column.sep.width = "-15pt", 
          notes = "state standard errors",   
          add.lines = list(  c("State Fixed Effects", "N","Y","N","N","Y","N")), 
          title = "Effect of JDPs on Establishment of Confucius Institutes  ", 
          covariate.labels = c("Fully-approved JDP","QS Ranking", "Democracy Index", "Constant"),
          dep.var.caption = c("Dependent Variables"),
          dep.var.labels = c("Establishment of Confucius Institutes"), column.separate = c(1,1,1))

## Table 4: The Effect of Fully-approved JDPs on Confucius Institutions at the global level
#                                                         Dependent Variables
#                                               Establishment of Confucius Institutes
#                                        Worldwide                             Excluding US
#                                 (1)         (2)         (3)         (4)         (5)         (6)
# Fully-approved JDP            0.145∗∗∗    0.109∗∗∗    0.150∗∗∗    0.146∗∗∗    0.105∗    0.156∗∗∗
#                               (0.025)     (0.038)     (0.027)     (0.027)     (0.056)   (0.029)
# QS Ranking                                0.039        0.022                  0.051∗∗   0.024
#                                           (0.023)     (0.021)                 (0.025)   (0.022)
# Democracy Index                                       −0.004                            −0.005∗∗
#                                                       (0.003)                           (0.003)
# Constant                      0.103∗∗∗                0.128∗∗∗    0.094∗∗∗              0.126∗∗∗
#                               (0.011)                 (0.024)     (0.011)               (0.024)
# State FE                        No          Yes         No          No          Yes       No
# Observations                  1,142       1,142       1,083         969         969       910
# Adjusted R2                   0.027       0.091       0.029       0.028       0.115     0.032


#==============================================================================#
#=============================               ==================================#
#=============================   APPENDIX    ==================================#
#=============================               ==================================#
#==============================================================================#


        #### Table A1 ####


stat_row <- function(x){
  x <- x[!is.na(x)]
  c(
    Observation = length(x),
    Mean = mean(x),
    SD = sd(x),
    Min = min(x),
    Max = max(x)
  )
}

ti <- bind_rows(
  data.frame(Statistic="US News Ranking", t(stat_row(newrankdata1US$setRank))),
  data.frame(Statistic="University Type", t(stat_row(as.numeric(as.character(newrankdata1US$uni_type)))))
)

tv <- bind_rows(
  data.frame(Statistic="Students Number (thousand)", t(stat_row(expand$UGDS1))),
  data.frame(Statistic="Asian Students Rates",       t(stat_row(expand$UGDS_ASIAN))),
  data.frame(Statistic="Urban Percentage",           t(stat_row(expand$POPPCT_URBAN))),
  data.frame(Statistic="Confucius Institutes",       t(stat_row(expand$skongstatus))),
  data.frame(Statistic="Semi-official JDP",          t(stat_row(expand$jointstatus))),
  data.frame(Statistic="Fully-approved JDP",         t(stat_row(expand$jointstatus0)))
)

A1 <- bind_rows(
  data.frame(Statistic="Time-Invariant Variables", Observation=NA, Mean=NA, SD=NA, Min=NA, Max=NA),
  ti,
  data.frame(Statistic="Time-Varying Variables", Observation=NA, Mean=NA, SD=NA, Min=NA, Max=NA),
  tv
)

A1_fmt <- A1 %>%
  mutate(
    Observation = ifelse(is.na(Observation), "", formatC(as.integer(Observation), format="d", big.mark=",")),
    Mean = ifelse(is.na(Mean), "", formatC(as.numeric(Mean), format="f", digits=3)),
    SD   = ifelse(is.na(SD),   "", formatC(as.numeric(SD),   format="f", digits=3)),
    Min  = ifelse(is.na(Min),  "", formatC(as.numeric(Min),  format="f", digits=3)),
    Max  = ifelse(is.na(Max),  "", formatC(as.numeric(Max),  format="f", digits=3))
  )

A1_fmt

## Table A1: Summary Statistics 
#                      Statistic    Observation     Mean       SD       Min        Max
#       Time-Invariant Variables                                         
#                US News Ranking       5,000       -0.930     0.322   -1.000      1.000
#                University Type       5,000        1.979     0.837    1.000      3.000
#         Time-Varying Variables                                         
#     Students Number (thousand)      54,496        3.182     5.601    0.000     151.558
#           Asian Students Rates      60,000        0.027     0.065    0.000      1.000
#               Urban Percentage      59,952        82.915    22.088   0.000     100.000
#           Confucius Institutes      60,000        0.012     0.109    0.000      1.000
#              Semi-official JDP      60,000        0.249     0.432    0.000      1.000
#             Fully-approved JDP      60,000        0.027     0.162    0.000      1.000


        #### Table A2 ####


# (1) State FE + Year FE, cluster at state
fit  <- felm(skongstatus ~ generalJDP | STABBR + year | 0 | STABBR, data = expand0)

# (2) University FE + Year FE, NO clustering (as in your code)
fit1 <- felm(skongstatus ~ generalJDP | UNITID + year | 0 | 0, data = expand0)

# (3) University FE + Year FE + controls, cluster at university
fit2 <- felm(skongstatus ~ generalJDP + UGDS1 + UGDS_ASIAN + STEM + PCIP16 |
               UNITID + year | 0 | UNITID, data = expand0)

stargazer(
  fit, fit1, fit2,
  type = "latex",
  title = "Table A2: Effect of Semi-official JDP on Establishment of Confucius Institutes",
  dep.var.caption = "Dependent Variables",
  dep.var.labels  = "Establishment of Confucius Institutes",
  
  covariate.labels = c("Semi-official JDP",
                       "Student Numbers",
                       "Asian Students",
                       "STEM",
                       "Foreign Language"),
  
  omit.stat = c("f", "ser", "rsq"),
  no.space = TRUE,
  column.sep.width = "-12pt",
  
  add.lines = list(
    c("State Fixed Effect",      "Yes", "No",  "No"),
    c("University Fixed Effect", "No",  "Yes", "Yes"),
    c("Year Fixed Effect",       "Yes", "Yes", "Yes")
  ),
  
  notes = paste0(
    "*p<0.1; **p<0.05; ***p<0.01. ",
    "Standard errors are clustered at the state level in column (1) and at the university level in column (3). ",
    "Data Source: Authors’ own database. Student Numbers, Asian Students Rates, STEM, and Foreign Language are summarized from https://collegescorecard.ed.gov/data/documentation/."
  )
)

## Table A2: Effect of Semi-official JDP on Establishment of Confucius Institutes
#                         Dependent Variables
#                         Establishment of Confucius Institutes
#                         (1)            (2)            (3)
# Semi-official JDP     0.085∗∗∗      0.037∗∗∗        0.037∗∗∗
#                       (0.007)       (0.002)         (0.007)
# Student Numbers                                     0.002∗∗∗
#                                                     (0.001)
# Asian Students                                      0.017
#                                                     (0.012)
# STEM                                                −0.0004
#                                                     (0.003)
# Foreign Language                                    −0.076
#                                                     (0.056)
#
# State Fixed Effect      Yes            No             No
# University Fixed Effect No             Yes            Yes
# Year Fixed Effect       Yes            Yes            Yes
# Observations           60,000        60,000          52,255
# Adjusted R2            0.077          0.665           0.667


        #### Table A3 ####

# 0) Create variables used in Table A3

# Student numbers in thousands (your table uses "Student Numbers")
newrankdata1US[, total_avg1 := total_avg / 1000]

# Ensure uni_type is a factor with Public as the baseline (IMPORTANT)
# You need to confirm your coding:
# e.g., 1=Public, 2=Private Nonprofit, 3=Private Forprofit


# 1) Models (1)-(5)

# (1) No controls, no FE
m1 <- felm(kongstatus ~ jointstatus | 0 | 0 | 0, data = newrankdata1US)

# (2) Add US News Ranking (no FE)
m2 <- felm(kongstatus ~ jointstatus + setRank | 0 | 0 | 0, data = newrankdata1US)

# (3) Add state fixed effects, cluster at state
m3 <- felm(kongstatus ~ jointstatus + setRank | STABBR | 0 | STABBR, data = newrankdata1US)

# (4) Add Democrat, keep state FE + state cluster
m4 <- felm(kongstatus ~ jointstatus + setRank + democrat | STABBR | 0 | STABBR, data = newrankdata1US)

# (5) Full controls, keep state FE + state cluster
# Note: factor(uni_type) will generate two dummies (Private Nonprofit / Private Forprofit), Public is baseline.
m5 <- felm(
  kongstatus ~ jointstatus + setRank + democrat +
    factor(uni_type) + total_avg1 + ug_asian + POPPCT_URBAN |
    STABBR | 0 | STABBR,
  data = newrankdata1US
)

# 2) Stargazer output (Table A3)

stargazer(
  m1, m2, m3, m4, m5,
  type = "latex",
  title = "Table A3: Cross-section Analysis with Semi-official JDP",
  dep.var.caption = "Dependent Variables",
  dep.var.labels  = "Establishment of Confucius Institutes",
  
  covariate.labels = c(
    "Semi-official JDP",
    "US News Ranking",
    "Democrat",
    "Private Nonprofit",
    "Private Forprofit",
    "Student Numbers",
    "Asian Students",
    "Urban Percentage"
  ),
  
  # keep N and Adjusted R2, drop F and SER
  omit.stat = c("f", "ser"),
  
  no.space = TRUE,
  column.sep.width = "-12pt",
  
  add.lines = list(
    c("State Fixed Effect", "No", "No", "Yes", "Yes", "Yes")
  ),
  
  notes = "*p<0.1; **p<0.05; ***p<0.01. Standard errors are clustered at the state level."
)

## Table A3: Cross-section Analysis with Semi-official JDP
#                               Dependent Variables
#                             Establishment of Confucius Institutes
#                       (1)       	(2)       	(3)       	(4)       	(5)
# Semi-official JDP   0.082∗∗     0.070∗∗∗    0.071∗∗∗    0.069∗∗∗    0.040∗
#                     (0.004)     (0.005)     (0.008)     (0.008)     (0.008)
# US News Ranking                 0.066∗∗∗    0.068∗∗∗    0.071∗∗∗    0.049∗∗∗
#                                 (0.006)     (0.014)     (0.014)     (0.015)
# Democrat                                                0.058∗∗∗    0.037∗∗
#                                                         (0.015)     (0.017)
# Private Nonprofit                                                   −0.018*
#                                                                     (0.009)
# Private Forprofit                                                   0.0003  
#                                                                     (0.010)
# Student Numbers                                                     0.006∗∗∗
#                                                                     (0.001)
# Asian Students                                                      0.051
#                                                                     (0.033)
# Urban Percentage                                                    0.0001
#                                                                     (0.0001)
# Constant             -0.000      0.064∗∗∗                         
#                      (0.002)     (0.006)
# State Fixed Effect                              Yes        Yes        Yes
# Observations          5,000      5,000         5,000       4,672      4,485
# Adjusted R2           0.063      0.083         0.083       0.088      0.172


        #### Table A4 ####


# Panel A: coefficients (log-odds) with clustered SE
# Panel B: predicted probabilities and first differences


# 0) Prepare data (use your panel dataset: expand)

dat <- expand %>%
  mutate(
    skongstatus = as.integer(skongstatus),
    generalJDP  = as.integer(generalJDP),
    UGDS1       = as.numeric(UGDS1),
    UGDS_ASIAN  = as.numeric(UGDS_ASIAN),
    year        = factor(year),
    STABBR      = as.character(STABBR),
    UNITID      = as.character(UNITID)
  )


# 1) Build "uncensored" FE codes (drop all-zero FE groups)
#    - statecode_un: state FE indicators excluding all-zero states
#    - univcode_un:  univ FE indicators excluding all-zero universities

make_uncensor_fe <- function(dep_var, dataset, fe_var, prefix){
  
  # mean(dep) within FE group
  y_panel <- aggregate(dataset[[dep_var]],
                       by = list(dataset[[fe_var]]),
                       FUN = mean, na.rm = TRUE)
  
  names(y_panel) <- c(fe_var, dep_var)
  
  # all-zero group => censor = 1
  y_panel$censor <- ifelse(y_panel[[dep_var]] == 0, 1, 0)
  
  # attach censor back to each row
  dataset[[paste0(prefix, "_censor")]] <- y_panel$censor[
    match(dataset[[fe_var]], y_panel[[fe_var]])
  ]
  
  # uncensor indicator
  dataset$uncensor <- 1 - dataset[[paste0(prefix, "_censor")]]
  
  # create numeric FE code
  dataset$code <- as.integer(factor(dataset[[fe_var]]))
  
  # "uncensored FE code"
  dataset[[paste0(prefix, "code_un")]] <- dataset$code * dataset$uncensor
  
  dataset
}


dat_s <- make_uncensor_fe("skongstatus", dat, fe_var = "STABBR", prefix = "state")
dat_u <- make_uncensor_fe("skongstatus", dat, fe_var = "UNITID", prefix = "univ")


# 2) Fit 4 bias-reduced logistic models (brglm)
#    (1)(2): state FE + year FE
#    (3)(4): univ FE  + year FE

m1 <- brglm(
  skongstatus ~ generalJDP + factor(statecode_un) + year,
  family = binomial("logit"),
  method = "brglm.fit",
  p1 = TRUE,
  data = dat_s
)

m2 <- brglm(
  skongstatus ~ generalJDP + UGDS1 + UGDS_ASIAN + factor(statecode_un) + year,
  family = binomial("logit"),
  method = "brglm.fit",
  p1 = TRUE,
  data = dat_s
)

m3 <- brglm(
  skongstatus ~ generalJDP + factor(univcode_un) + year,
  family = binomial("logit"),
  method = "brglm.fit",
  p1 = TRUE,
  data = dat_u
)

m4 <- brglm(
  skongstatus ~ generalJDP + UGDS1 + UGDS_ASIAN + factor(univcode_un) + year,
  family = binomial("logit"),
  method = "brglm.fit",
  p1 = TRUE,
  data = dat_u
)


# 3) Cluster-robust VCOV (align clusters with model-used rows)
#    (1)(2): cluster at state; (3)(4): cluster at university

get_used_rows <- function(model) as.integer(rownames(model.frame(model)))

# (1) state cluster
rows1 <- get_used_rows(m1)
vc1 <- vcovCL(m1, cluster = dat_s$STABBR[rows1], type = "HC1")
se1 <- sqrt(diag(vc1))

# (2) state cluster
rows2 <- get_used_rows(m2)
vc2 <- vcovCL(m2, cluster = dat_s$STABBR[rows2], type = "HC1")
se2 <- sqrt(diag(vc2))

# (3) univ cluster
rows3 <- get_used_rows(m3)
vc3 <- vcovCL(m3, cluster = dat_u$UNITID[rows3], type = "HC1")
se3 <- sqrt(diag(vc3))

# (4) univ cluster
rows4 <- get_used_rows(m4)
vc4 <- vcovCL(m4, cluster = dat_u$UNITID[rows4], type = "HC1")
se4 <- sqrt(diag(vc4))



# 4) Panel A output (brglm coefficients with clustered SE)
#    Use texreg/screenreg; replace SE by override.se

screenreg(
  list(m1, m2, m3, m4),
  override.se = list(se1, se2, se3, se4),
  stars = c(0.1, 0.05, 0.01),
  custom.model.names = c("(1)", "(2)", "(3)", "(4)")
)

##   Table A4: Robustness checks using Rare Events Logistic Regression models
# Panel A: Rare Events Logistic Regression
#                                   Dependent Variables
#                         Establishment of Confucius Institutes
#                           (1)         (2)         (3)         (4)     
# Semi-official JDP        9.16 ***   13.53 ***    -0.10       -0.12
#                           (1.38)     (2.23)       (0.89)      (0.87)
# Control Variable                      Yes                     Yes
# State Fixed Effect       Yes          Yes
# University Fixed Effect                            Yes          Yes
# Year Fixed Effect        Yes          Yes          Yes          Yes
# Observations             60,000     54,495        60,000	    54,495


# 5) Panel B: predicted probabilities & first differences
#    Simulate coefficients from clustered VCOV to get s.e.

compute_fd <- function(model, vcov_mat, varname = "generalJDP",
                       x0 = 0, x1 = 1, n_sim = 1000) {
  
  b <- coef(model)
  S <- mvrnorm(n = n_sim, mu = b, Sigma = vcov_mat)
  
  X <- model.matrix(model)
  X0 <- X; X1 <- X
  X0[, varname] <- x0
  X1[, varname] <- x1
  
  # n_obs x n_sim
  p0 <- plogis(X0 %*% t(S))
  p1 <- plogis(X1 %*% t(S))
  
  FD_mat <- p1 - p0
  
  # average across observations for each sim
  p0_sim <- colMeans(p0)
  p1_sim <- colMeans(p1)
  fd_sim <- colMeans(FD_mat)
  
  list(
    mean_p0 = mean(p0_sim),
    se_p0   = sd(p0_sim),
    mean_p1 = mean(p1_sim),
    se_p1   = sd(p1_sim),
    mean_FD = mean(fd_sim),
    se_FD   = sd(fd_sim)
  )
}

fd_m1 <- compute_fd(m1, vc1, varname = "generalJDP")
fd_m2 <- compute_fd(m2, vc2, varname = "generalJDP")
fd_m3 <- compute_fd(m3, vc3, varname = "generalJDP")
fd_m4 <- compute_fd(m4, vc4, varname = "generalJDP")

# Print Panel B results (for copying into Table A4)
print(fd_m1); print(fd_m2); print(fd_m3); print(fd_m4)

# A neat combined table object for Panel B
panelB <- data.frame(
  Model = c("(1)","(2)","(3)","(4)"),
  p0 = c(fd_m1$mean_p0, fd_m2$mean_p0, fd_m3$mean_p0, fd_m4$mean_p0),
  se_p0 = c(fd_m1$se_p0, fd_m2$se_p0, fd_m3$se_p0, fd_m4$se_p0),
  p1 = c(fd_m1$mean_p1, fd_m2$mean_p1, fd_m3$mean_p1, fd_m4$mean_p1),
  se_p1 = c(fd_m1$se_p1, fd_m2$se_p1, fd_m3$se_p1, fd_m4$se_p1),
  fd = c(fd_m1$mean_FD, fd_m2$mean_FD, fd_m3$mean_FD, fd_m4$mean_FD),
  se_fd = c(fd_m1$se_FD, fd_m2$se_FD, fd_m3$se_FD, fd_m4$se_FD)
)

panelB

# Panel B: Effects in terms of predicted probabilities (JDP: 0 → 1)
#                                         (1)	        (2)	        (3)	        (4)
# Pr (Y=1) when Semi-official JDP =0    1.03e-05    5.60e-06    0.4492      0.4669
#                                      (6.14e-08)  (9.69e-07)  (0.3058)    (0.4458)
# Pr (Y=1) when Semi-official JDP =1    0.0848      0.0473      0.4705      0.4631
#                                      (0.0012)     (0.0040)   (0.4647)    (0.4649)
# First Difference (1 − 0)              0.0848      0.0473      0.0214      -0.0038
#                                      (0.0012)     (0.0040)    (0.5461)   (0.1344)


        #### Table A5 ####


# 0) Prepare variables

# Student Numbers in thousands (as in your table)
newrankdata1US[, total_avg1 := total_avg / 1000]


# 1) Models (1)-(5): Fully-approved JDP = jointstatus0

# (1) baseline
fit1 <- felm(kongstatus ~ jointstatus0 | 0 | 0 | 0, data = newrankdata1US)

# (2) + US News Ranking
fit2 <- felm(kongstatus ~ jointstatus0 + setRank | 0 | 0 | 0, data = newrankdata1US)

# (3) + State FE, cluster at state
fit3 <- felm(kongstatus ~ jointstatus0 + setRank | STABBR | 0 | STABBR, data = newrankdata1US)

# (4) + Democrat, + State FE, cluster at state
fit4 <- felm(kongstatus ~ jointstatus0 + setRank + democrat | STABBR | 0 | STABBR, data = newrankdata1US)

# (5) Full controls, + State FE, cluster at state
fit5 <- felm(
  kongstatus ~ jointstatus0 + setRank + democrat +
    factor(uni_type) + total_avg1 + ug_asian + POPPCT_URBAN |
    STABBR | 0 | STABBR,
  data = newrankdata1US
)


# 2) Stargazer output: Table A5

stargazer(
  fit1, fit2, fit3, fit4, fit5,
  type = "latex",
  
  title = "Table A5: Cross-section Analysis with Fully-approved JDP",
  dep.var.caption = "Dependent Variables",
  dep.var.labels  = "Establishment of Confucius Institutes",
  
  # IMPORTANT: order must match the appearance order in models
  covariate.labels = c(
    "Fully-approved JDP",
    "US News Ranking",
    "Democrat",
    "Private Nonprofit",
    "Private Forprofit",
    "Student Numbers",
    "Asian Students",
    "Urban Percentage"
  ),
  
  omit.stat = c("f", "ser"),
  no.space = TRUE,
  column.sep.width = "-12pt",
  
  add.lines = list(
    c("State Fixed Effect", "No", "No", "Yes", "Yes", "Yes")
  ),
  
  notes = "*p<0.1; **p<0.05; ***p<0.01. Standard errors are clustered at the state level."
)

## Table A5: Cross-section Analysis with Fully-approved JDP 
#                                           Dependent Variables
#                                    Establishment of Confucius Institutes
#                                 (1)       	(2)	        (3)	        (4)	        (5)
# Fully-approved JDP            0.169∗∗∗    0.154∗∗∗    0.152∗∗∗    0.157∗∗∗    0.108∗∗∗
#                               (0.012)     (0.012)     (0.030)     (0.029)     (0.029)
# US News Ranking                           0.081∗∗∗    0.083∗∗∗    0.086∗∗∗    0.053∗∗∗
#                                           (0.006)     (0.013)     (0.013)     (0.015)
# Democrat                                                          0.062∗∗∗    0.041∗∗
#                                                                   (0.014)     (0.016)
# Private Nonprofit                                                             −0.016∗
#                                                                               (0.009)
# Private Forprofit                                                             −0.003
#                                                                               (0.011)
# Student Numbers                                                               0.006∗∗∗
#                                                                               (0.001)
# Asian Students                                                                0.053
#                                                                               (0.034)
# Urban Percentage                                                              0.0001
#                                                                               (0.0001)
# Constant                     0.016∗∗∗    0.092∗∗∗
#                              (0.002)     (0.006)
# State Fixed Effect             No          No           Yes        Yes         Yes
# Observations                  5,000       5,000        5,000      4,672       4,672
# Adjusted R2                   0.038       0.071        0.069      0.078       0.175

        #### Table A6 ####


simulate_china <- unique(use[,c("UNITID","中方大学", "省份", "kongstatus")])[kongstatus==T,c("中方大学", "省份")] #101,有重复

#
simulation_result <- data.frame()
set.seed(1000)
for(i in 1:10000){
  
  x1 <- data.table(sample(use$UNITID, 102, replace = FALSE, prob = NULL))
  x2 <- data.table(sample(simulate_china$中方大学, 102, replace = FALSE, prob = NULL))
  y <-  data.table(cbind(x1,x2))
  colnames(y)[1] <- "UNITID"
  colnames(y)[2] <- "中方大学"
  y <- merge(use_real[,c("INSTNM", "UNITID", "STABBR", "jointstatus0", "college_in_china", "province" )], y, by = "UNITID", all.x = T)
  y <- merge(y, unique(simulate_china), by = "中方大学", all.x = T)
  print(i)
  y[is.na(中方大学), kongstatus:= F]
  y[is.na(中方大学)==F, kongstatus:= T]
  #1
  simulation_result[i, "same university"] <- as.numeric(data.table(table(y[jointstatus0 == T & kongstatus == T]$college_in_china == y[jointstatus0 == T & kongstatus == T]$中方大学, useNA = "always"))[2,2])
  
  simulation_result[i, "same province"] <- as.numeric(data.table(table(y[jointstatus0 == T & kongstatus == T]$province == y[jointstatus0 == T & kongstatus == T]$省份, useNA = "always"))[2,2]) - as.numeric(data.table(table(y[jointstatus0 == T & kongstatus == T]$college_in_china == y[jointstatus0 == T & kongstatus == T]$中方大学, useNA = "always"))[2,2])
  
  simulation_result[i, "other province"] <- as.numeric(data.table(table(y[jointstatus0 == T & kongstatus == T]$province == y[jointstatus0 == T & kongstatus == T]$省份, useNA = "always"))[1,2])
  
  #2
  simulation_result[i, "CI NO JDP YES"] <- as.numeric(data.table(table(y[jointstatus0 == T & kongstatus == F]$college_in_china == y[jointstatus0 == T & kongstatus == F]$中方大学, useNA = "always"))[1,2])
  #3
  simulation_result[i, "CI YES JDP NO"] <- as.numeric(data.table(table(y[jointstatus0 == F & kongstatus == T]$college_in_china == y[jointstatus0 == F & kongstatus == T]$中方大学, useNA = "always"))[1,2])
  #4
  simulation_result[i,"CI NO JDP NO"] <- as.numeric(data.table(table(y[jointstatus0 == F & kongstatus == F]$college_in_china == y[jointstatus0 == F & kongstatus == F]$中方大学, useNA = "always"))[1,2])
  i = i+1
}

mean(simulation_result$`same university`, na.rm = T)
mean(simulation_result$`same province`, na.rm = T)
mean(simulation_result$`other province`, na.rm = T)
mean(simulation_result$`CI NO JDP YES`, na.rm = T)
mean(simulation_result$`CI YES JDP NO`, na.rm = T)
mean(simulation_result$`CI NO JDP NO`, na.rm = T)

sd(simulation_result$`same university`, na.rm = T)
sd(simulation_result$`same province`, na.rm = T)
sd(simulation_result$`other province`, na.rm = T)
sd(simulation_result$`CI NO JDP YES`, na.rm = T)
sd(simulation_result$`CI YES JDP NO`, na.rm = T)
sd(simulation_result$`CI NO JDP NO`, na.rm = T)

## 1. CI YES JDP YES
#1.same uni
table(use_real[jointstatus0 == T & kongstatus == T]$college_in_china == use_real[jointstatus0 == T & kongstatus == T]$中方大学, useNA = "always")
# 4

#2.same province #3. different province
table(use_real[jointstatus0 == T & kongstatus == T]$province == use_real[jointstatus0 == T & kongstatus == T]$省份, useNA = "always")
# same 8-4(same university)
# different  26

## 2.CI NO JDP YES
table(use_real[jointstatus0 == T & kongstatus == F]$college_in_china == use_real[jointstatus0 == T & kongstatus == F]$中方大学, useNA = "always")
# 137

## 3.CI YES JDP NO
table(use_real[jointstatus0 == F & kongstatus == T]$college_in_china == use_real[jointstatus0 == F & kongstatus == T]$中方大学, useNA = "always")
# 62

## 4.CI NO JDP NO
table(use_real[jointstatus0 == F & kongstatus == F]$college_in_china == use_real[jointstatus0 == F & kongstatus == F]$中方大学, useNA = "always")
# 1171

# ---------- 1) Simulated: mean and sd ----------
sim_mean <- c(
  "Same University"      = mean(simulation_result$`same university`, na.rm = TRUE),
  "Same Province"        = mean(simulation_result$`same province`,   na.rm = TRUE),
  "Different Province"   = mean(simulation_result$`other province`,  na.rm = TRUE),
  "CI (n), JDP (y)"      = mean(simulation_result$`CI NO JDP YES`,    na.rm = TRUE),
  "CI (y), JDP (n)"      = mean(simulation_result$`CI YES JDP NO`,    na.rm = TRUE),
  "CI (n), JDP (n)"      = mean(simulation_result$`CI NO JDP NO`,     na.rm = TRUE)
)

sim_sd <- c(
  "Same University"      = sd(simulation_result$`same university`, na.rm = TRUE),
  "Same Province"        = sd(simulation_result$`same province`,   na.rm = TRUE),
  "Different Province"   = sd(simulation_result$`other province`,  na.rm = TRUE),
  "CI (n), JDP (y)"      = sd(simulation_result$`CI NO JDP YES`,    na.rm = TRUE),
  "CI (y), JDP (n)"      = sd(simulation_result$`CI YES JDP NO`,    na.rm = TRUE),
  "CI (n), JDP (n)"      = sd(simulation_result$`CI NO JDP NO`,     na.rm = TRUE)
)

# ---------- 2) Observed: compute from use_real ----------
use_dt <- as.data.table(use_real)

# make sure logical
use_dt[, jointstatus0 := as.logical(jointstatus0)]
use_dt[, kongstatus   := as.logical(kongstatus)]

yy <- use_dt[jointstatus0 == TRUE & kongstatus == TRUE]

# same university = count TRUE in equality
tab_uni <- table(yy$college_in_china == yy$中方大学, useNA = "always")
obs_same_univ <- as.numeric(tab_uni["TRUE"])

# province match table
tab_prov <- table(yy$province == yy$省份, useNA = "always")
# same province (excluding same university)
obs_same_prov <- as.numeric(tab_prov["TRUE"]) - obs_same_univ
# different province
obs_diff_prov <- as.numeric(tab_prov["FALSE"])

# other cells: just row counts
obs_ciN_jdpY <- nrow(use_dt[jointstatus0 == TRUE  & kongstatus == FALSE])
obs_ciY_jdpN <- nrow(use_dt[jointstatus0 == FALSE & kongstatus == TRUE ])
obs_ciN_jdpN <- nrow(use_dt[jointstatus0 == FALSE & kongstatus == FALSE])

obs_vec <- c(
  "Same University"      = obs_same_univ,
  "Same Province"        = obs_same_prov,
  "Different Province"   = obs_diff_prov,
  "CI (n), JDP (y)"      = obs_ciN_jdpY,
  "CI (y), JDP (n)"      = obs_ciY_jdpN,
  "CI (n), JDP (n)"      = obs_ciN_jdpN
)

# ---------- 3) Obs./Sim. ----------
ratio <- obs_vec / sim_mean

# ---------- 4) Build display table ----------
fmt_sim <- function(m, s){
  # show mean with 3 decimals + (sd with 3 decimals); match your style
  paste0(formatC(m, format = "f", digits = 3), "\n(",
         formatC(s, format = "f", digits = 3), ")")
}

A6 <- data.table(
  Row = c(
    "CI (y), JDP (y)",
    "Same University",
    "Same Province",
    "Different Province",
    "CI (n), JDP (y)",
    "CI (y), JDP (n)",
    "CI (n), JDP (n)"
  ),
  Simulated = c(
    "",  # group header
    fmt_sim(sim_mean["Same University"],    sim_sd["Same University"]),
    fmt_sim(sim_mean["Same Province"],      sim_sd["Same Province"]),
    fmt_sim(sim_mean["Different Province"], sim_sd["Different Province"]),
    fmt_sim(sim_mean["CI (n), JDP (y)"],    sim_sd["CI (n), JDP (y)"]),
    fmt_sim(sim_mean["CI (y), JDP (n)"],    sim_sd["CI (y), JDP (n)"]),
    fmt_sim(sim_mean["CI (n), JDP (n)"],    sim_sd["CI (n), JDP (n)"])
  ),
  Observed = c(
    "",  # group header
    as.character(obs_vec["Same University"]),
    as.character(obs_vec["Same Province"]),
    as.character(obs_vec["Different Province"]),
    as.character(obs_vec["CI (n), JDP (y)"]),
    as.character(obs_vec["CI (y), JDP (n)"]),
    as.character(obs_vec["CI (n), JDP (n)"])
  ),
  `Obs./Sim.` = c(
    "",  # group header
    formatC(ratio["Same University"],    format="f", digits=3),
    formatC(ratio["Same Province"],      format="f", digits=3),
    formatC(ratio["Different Province"], format="f", digits=3),
    formatC(ratio["CI (n), JDP (y)"],    format="f", digits=3),
    formatC(ratio["CI (y), JDP (n)"],    format="f", digits=3),
    formatC(ratio["CI (n), JDP (n)"],    format="f", digits=3)
  )
)

A6

# ---------- 5) LaTeX output (template) ----------
note_text <- paste0(
  "Note: We randomly re-assigned the existing CIs to a potential pool of top 1337 universities in the US with US news rankings, ",
  "fixing the JDP networks and the CIs’ Chinese partners. As some US universities host more than one JDP with Chinese universities, ",
  "the observed number of US university is 1404. For each university in the US, the CI status is (y or n), the JDP status is (y or n). ",
  "Within the type of universities that have both CI and JDP with China (i.e., CI y, JDP y), its Chinese CI partner could be its JDP partner ",
  "(“Same University”), or another university from the same province of its JDP partner (“Same Province”), or a university from another province ",
  "(“Different Province”). The number of simulation equals to 10,000. Standard deviations are in the brackets."
)

latex_A6 <- function(tab, caption = "Table A6: Simulated vs. Observed Joint Distribution of CIs and JDPs in the US") {
  lines <- c()
  lines <- c(lines, "\\begin{table}[!htbp]\\centering")
  lines <- c(lines, paste0("\\caption{", caption, "}"))
  lines <- c(lines, "\\begin{tabular}{lccc}")
  lines <- c(lines, "\\hline\\hline")
  lines <- c(lines, " & (1) Simulated & (2) Observed & (3) Obs./Sim. \\\\")
  lines <- c(lines, "\\hline")
  
  for(i in 1:nrow(tab)){
    r <- tab[i]
    if (r$Row == "CI (y), JDP (y)") {
      lines <- c(lines, "\\multicolumn{4}{l}{\\textit{CI (y), JDP (y)}}\\\\")
      next
    }
    # keep line breaks in simulated cell
    sim_cell <- gsub("\n", " ", r$Simulated)   # LaTeX: you can replace with \\ if you want
    # Better: show (sd) on next line
    sim_cell <- gsub("\\) \\(", ")\\\\ (", sim_cell, fixed = TRUE)
    
    lines <- c(lines, paste0(
      r$Row, " & ", sim_cell, " & ", r$Observed, " & ", r$`Obs./Sim.`, " \\\\"
    ))
    lines <- c(lines, "\\addlinespace")
  }
  
  lines <- c(lines, "\\hline\\hline")
  lines <- c(lines, paste0("\\multicolumn{4}{p{0.95\\linewidth}}{\\footnotesize ", note_text, "}\\\\"))
  lines <- c(lines, "\\end{tabular}")
  lines <- c(lines, "\\end{table}")
  cat(paste(lines, collapse = "\n"))
}

latex_A6(A6)

